/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.analysis.core;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AutomatonToTokenStream;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.tests.analysis.CannedTokenStream;
import org.apache.lucene.tests.analysis.MockTokenizer;
import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition;

public class TestFlattenGraphFilter extends BaseTokenStreamTestCase {

  private static Token token(
      String term, int posInc, int posLength, int startOffset, int endOffset) {
    final Token t = new Token(term, startOffset, endOffset);
    t.setPositionIncrement(posInc);
    t.setPositionLength(posLength);
    return t;
  }

  public void testSimpleMock() throws Exception {
    Analyzer a =
        new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
            TokenStream ts = new FlattenGraphFilter(tokenizer);
            return new TokenStreamComponents(tokenizer, ts);
          }
        };

    assertAnalyzesTo(
        a,
        "wtf happened",
        new String[] {"wtf", "happened"},
        new int[] {0, 4},
        new int[] {3, 12},
        null,
        new int[] {1, 1},
        new int[] {1, 1},
        true);
  }

  // Make sure graph is unchanged if it's already flat
  public void testAlreadyFlatten() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0,
            12,
            token("wtf", 1, 1, 0, 3),
            token("what", 0, 1, 0, 3),
            token("wow", 0, 1, 0, 3),
            token("the", 1, 1, 0, 3),
            token("that's", 0, 1, 0, 3),
            token("fudge", 1, 1, 0, 3),
            token("funny", 0, 1, 0, 3),
            token("happened", 1, 1, 4, 12));

    TokenStream out = new FlattenGraphFilter(in);

    // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
    assertTokenStreamContents(
        out,
        new String[] {"wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened"},
        new int[] {0, 0, 0, 0, 0, 0, 0, 4},
        new int[] {3, 3, 3, 3, 3, 3, 3, 12},
        new int[] {1, 0, 0, 1, 0, 1, 0, 1},
        new int[] {1, 1, 1, 1, 1, 1, 1, 1},
        12);
  }

  public void testWTF1() throws Exception {

    // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on
    // input:
    TokenStream in =
        new CannedTokenStream(
            0,
            12,
            token("wtf", 1, 5, 0, 3),
            token("what", 0, 1, 0, 3),
            token("wow", 0, 3, 0, 3),
            token("the", 1, 1, 0, 3),
            token("fudge", 1, 3, 0, 3),
            token("that's", 1, 1, 0, 3),
            token("funny", 1, 1, 0, 3),
            token("happened", 1, 1, 4, 12));

    TokenStream out = new FlattenGraphFilter(in);

    // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
    assertTokenStreamContents(
        out,
        new String[] {"wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened"},
        new int[] {0, 0, 0, 0, 0, 0, 0, 4},
        new int[] {3, 3, 3, 3, 3, 3, 3, 12},
        new int[] {1, 0, 0, 1, 0, 1, 0, 1},
        new int[] {3, 1, 1, 1, 1, 1, 1, 1},
        12);
  }

  /** Same as testWTF1 except the "wtf" token comes out later */
  public void testWTF2() throws Exception {

    // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on
    // input:
    TokenStream in =
        new CannedTokenStream(
            0,
            12,
            token("what", 1, 1, 0, 3),
            token("wow", 0, 3, 0, 3),
            token("wtf", 0, 5, 0, 3),
            token("the", 1, 1, 0, 3),
            token("fudge", 1, 3, 0, 3),
            token("that's", 1, 1, 0, 3),
            token("funny", 1, 1, 0, 3),
            token("happened", 1, 1, 4, 12));

    TokenStream out = new FlattenGraphFilter(in);

    // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
    assertTokenStreamContents(
        out,
        new String[] {"what", "wow", "wtf", "the", "that's", "fudge", "funny", "happened"},
        new int[] {0, 0, 0, 0, 0, 0, 0, 4},
        new int[] {3, 3, 3, 3, 3, 3, 3, 12},
        new int[] {1, 0, 0, 1, 0, 1, 0, 1},
        new int[] {1, 1, 3, 1, 1, 1, 1, 1},
        12);
  }

  public void testNonGreedySynonyms() throws Exception {
    // This is just "hypothetical" for Lucene today, because SynFilter is
    // greedy: when two syn rules match on overlapping tokens, only one
    // (greedily) wins.  This test pretends all syn matches could match:

    TokenStream in =
        new CannedTokenStream(
            0,
            20,
            token("wizard", 1, 1, 0, 6),
            token("wizard_of_oz", 0, 3, 0, 12),
            token("of", 1, 1, 7, 9),
            token("oz", 1, 1, 10, 12),
            token("oz_screams", 0, 2, 10, 20),
            token("screams", 1, 1, 13, 20));

    TokenStream out = new FlattenGraphFilter(in);

    // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
    assertTokenStreamContents(
        out,
        new String[] {"wizard", "wizard_of_oz", "of", "oz", "oz_screams", "screams"},
        new int[] {0, 0, 7, 10, 10, 13},
        new int[] {6, 12, 9, 12, 20, 20},
        new int[] {1, 0, 1, 1, 0, 1},
        new int[] {1, 3, 1, 1, 2, 1},
        20);
  }

  public void testNonGraph() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0,
            22,
            token("hello", 1, 1, 0, 5),
            token("pseudo", 1, 1, 6, 12),
            token("world", 1, 1, 13, 18),
            token("fun", 1, 1, 19, 22));

    TokenStream out = new FlattenGraphFilter(in);

    // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened:
    assertTokenStreamContents(
        out,
        new String[] {"hello", "pseudo", "world", "fun"},
        new int[] {0, 6, 13, 19},
        new int[] {5, 12, 18, 22},
        new int[] {1, 1, 1, 1},
        new int[] {1, 1, 1, 1},
        22);
  }

  public void testSimpleHole() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0,
            13,
            token("hello", 1, 1, 0, 5),
            token("hole", 2, 1, 6, 10),
            token("fun", 1, 1, 11, 13));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"hello", "hole", "fun"},
        new int[] {0, 6, 11},
        new int[] {5, 10, 13},
        new int[] {1, 2, 1},
        new int[] {1, 1, 1},
        13);
  }

  public void testHoleUnderSyn() throws Exception {
    // Tests a StopFilter after SynFilter where a stopword in a syn is removed
    //
    //   wizard of oz -> woz syn, but then "of" becomes a hole

    TokenStream in =
        new CannedTokenStream(
            0,
            12,
            token("wizard", 1, 1, 0, 6),
            token("woz", 0, 3, 0, 12),
            token("oz", 2, 1, 10, 12));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"wizard", "woz", "oz"},
        new int[] {0, 0, 10},
        new int[] {6, 12, 12},
        new int[] {1, 0, 2},
        new int[] {1, 3, 1},
        12);
  }

  public void testStrangelyNumberedNodes() throws Exception {

    // Uses only nodes 0, 2, 3, i.e. 1 is just never used (it is not a hole!!)
    TokenStream in =
        new CannedTokenStream(
            0,
            27,
            token("dog", 1, 3, 0, 5),
            token("puppy", 0, 3, 0, 5),
            token("flies", 3, 1, 6, 11));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"dog", "puppy", "flies"},
        new int[] {0, 0, 6},
        new int[] {5, 5, 11},
        new int[] {1, 0, 1},
        new int[] {1, 1, 1},
        27);
  }

  public void testTwoLongParallelPaths() throws Exception {

    // "a a a a a a" in parallel with "b b b b b b"
    TokenStream in =
        new CannedTokenStream(
            0,
            11,
            token("a", 1, 1, 0, 1),
            token("b", 0, 2, 0, 1),
            token("a", 1, 2, 2, 3),
            token("b", 1, 2, 2, 3),
            token("a", 1, 2, 4, 5),
            token("b", 1, 2, 4, 5),
            token("a", 1, 2, 6, 7),
            token("b", 1, 2, 6, 7),
            token("a", 1, 2, 8, 9),
            token("b", 1, 2, 8, 9),
            token("a", 1, 2, 10, 11),
            token("b", 1, 2, 10, 11));

    TokenStream out = new FlattenGraphFilter(in);

    // ... becomes flattened to a single path with overlapping a/b token between each node:
    assertTokenStreamContents(
        out,
        new String[] {"a", "b", "a", "b", "a", "b", "a", "b", "a", "b", "a", "b"},
        new int[] {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10},
        new int[] {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11},
        new int[] {1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
        new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
        11);
  }

  // b has a posInc of 1, which is correct, but no edge ever visited that node.
  // After hole recovery 'b' and 'c' should still be under 'abc'
  // assert disabled = pos length of abc = 4
  // assert enabled = AssertionError: outputEndNode=3 vs inputTo=2
  public void testAltPathFirstStepHole() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0, 3, token("abc", 1, 3, 0, 3), token("b", 1, 1, 1, 2), token("c", 1, 1, 2, 3));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"abc", "b", "c"},
        new int[] {0, 1, 2},
        new int[] {3, 2, 3},
        new int[] {1, 1, 1},
        new int[] {3, 1, 1},
        3);
  }

  // Last node in an alt path fixes output node of long path. In this graph the follow-up node fixes
  // that.
  // incorrect pos length of abc = 1
  public void testAltPathLastStepHole() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0,
            4,
            token("abc", 1, 3, 0, 3),
            token("a", 0, 1, 0, 1),
            token("b", 1, 1, 1, 2),
            token("d", 2, 1, 3, 4));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"abc", "a", "b", "d"},
        new int[] {0, 0, 1, 3},
        new int[] {1, 1, 2, 4},
        new int[] {1, 0, 1, 2},
        new int[] {3, 1, 1, 1},
        4);
  }

  // Check to see how multiple holes in a row are preserved.
  public void testLongHole() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0,
            28,
            token("hello", 1, 1, 0, 5),
            token("hole", 5, 1, 20, 24),
            token("fun", 1, 1, 25, 28));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"hello", "hole", "fun"},
        new int[] {0, 20, 25},
        new int[] {5, 24, 28},
        new int[] {1, 2, 1},
        new int[] {1, 1, 1},
        28);
  }

  // multiple nodes missing in the alt path.
  // assert disabled = nothing
  // assert enabled = AssertionError
  public void testAltPathLastStepLongHole() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0, 4, token("abc", 1, 3, 0, 3), token("a", 0, 1, 0, 1), token("d", 3, 1, 3, 4));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"abc", "a", "d"},
        new int[] {0, 0, 3},
        new int[] {1, 1, 4},
        new int[] {1, 0, 2},
        new int[] {2, 1, 1},
        4);
  }

  // LUCENE-8723
  // Token stream ends without any edge to fix the long edge's output node
  // assert disabled = dropped token
  // assert enabled = AssertionError: 2
  public void testAltPathLastStepHoleWithoutEndToken() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0, 2, token("abc", 1, 3, 0, 3), token("a", 0, 1, 0, 1), token("b", 1, 1, 1, 2));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"abc", "a", "b"},
        new int[] {0, 0, 1},
        new int[] {1, 1, 2},
        new int[] {1, 0, 1},
        new int[] {1, 1, 1},
        2);
  }

  // similar to AltPathLastStepHoleWithoutEndToken, but instead of no token to trigger long path
  // resolution,
  // the next token has no way to reference to the long path so we have to resolve as if that last
  // token wasn't present.
  public void testAltPathLastStepHoleFollowedByHole() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0, 5, token("abc", 1, 3, 0, 3), token("b", 1, 1, 1, 2), token("e", 3, 1, 4, 5));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"abc", "b", "e"},
        new int[] {0, 1, 4},
        new int[] {3, 2, 5},
        new int[] {1, 1, 2},
        new int[] {1, 1, 1},
        5);
  }

  // Two Shingled long paths pass each other which gives a flattened graph with tokens backing up a
  // lot.
  public void testShingledGap() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0,
            5,
            token("abc", 1, 3, 0, 3),
            token("a", 0, 1, 0, 1),
            token("b", 1, 1, 1, 2),
            token("cde", 1, 3, 2, 5),
            token("d", 1, 1, 3, 4),
            token("e", 1, 1, 4, 5));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"abc", "a", "d", "b", "cde", "e"},
        new int[] {0, 0, 3, 3, 4, 4},
        new int[] {1, 1, 3, 3, 5, 5},
        new int[] {1, 0, 1, 0, 1, 0},
        new int[] {1, 1, 1, 1, 1, 1},
        5);
  }

  // With shingles, token order may change during flattening.
  // We need to be careful not to free input nodes if they still have unreleased edges.
  // with/without exceptions ArrayIndexOutOfBoundsException
  public void testShingledGapWithHoles() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0,
            5,
            token("abc", 1, 3, 0, 3),
            token("b", 1, 1, 1, 2),
            token("cde", 1, 3, 2, 5),
            token("d", 1, 1, 3, 4),
            token("e", 1, 1, 4, 5));

    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out,
        new String[] {"abc", "d", "b", "cde", "e"},
        new int[] {0, 3, 3, 4, 4},
        new int[] {3, 3, 3, 5, 5},
        new int[] {1, 1, 0, 1, 0},
        new int[] {1, 1, 1, 1, 1},
        5);
  }

  // When the first token is a hole there is no original token to offset from.
  public void testFirstTokenHole() throws Exception {
    TokenStream in = new CannedTokenStream(0, 9, token("start", 2, 1, 0, 5));
    TokenStream out = new FlattenGraphFilter(in);

    assertTokenStreamContents(
        out, new String[] {"start"}, new int[] {0}, new int[] {5}, new int[] {2}, new int[] {1}, 9);
  }

  // The singled token starts from a hole.
  // Hole recovery will cause the shingled token to start later in the output than its alternate
  // paths.
  // This will result in it being released too early.
  public void testShingleFromGap() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0,
            9,
            token("a", 1, 1, 4, 8),
            token("abc", 0, 3, 4, 7),
            token("cd", 2, 2, 6, 8),
            token("d", 1, 1, 7, 8),
            token("e", 1, 1, 8, 9));
    TokenStream out = new FlattenGraphFilter(in);
    assertTokenStreamContents(
        out,
        new String[] {"a", "abc", "d", "cd", "e"},
        new int[] {4, 4, 7, 7, 8},
        new int[] {7, 7, 8, 8, 9},
        new int[] {1, 0, 1, 1, 1},
        new int[] {1, 1, 2, 1, 1},
        9);
  }

  public void testShingledGapAltPath() throws Exception {
    TokenStream in =
        new CannedTokenStream(
            0, 4, token("abc", 1, 3, 0, 3), token("abcd", 0, 4, 0, 4), token("cd", 2, 2, 2, 4));
    TokenStream out = new FlattenGraphFilter(in);
    assertTokenStreamContents(
        out,
        new String[] {"abc", "abcd", "cd"},
        new int[] {0, 0, 2},
        new int[] {3, 4, 4},
        new int[] {1, 0, 1},
        new int[] {1, 2, 1},
        4);
  }

  // Lots of shingles and alternate paths connecting to each other. One edge 'c' missing between
  // 'ab' and 'def'
  public void testHeavilyConnectedGraphWithGap() throws IOException {
    TokenStream in =
        new CannedTokenStream(
            0,
            7,
            token("a", 1, 1, 0, 1),
            token("ab", 0, 2, 0, 2),
            token("abcdef", 0, 6, 0, 6),
            token("abcd", 0, 4, 0, 4),
            token("bcdef", 1, 5, 1, 7),
            token("def", 2, 3, 4, 7),
            token("e", 1, 1, 5, 6),
            token("f", 1, 1, 6, 7));
    TokenStream out = new FlattenGraphFilter(in);
    assertTokenStreamContents(
        out,
        new String[] {"a", "ab", "abcdef", "abcd", "bcdef", "e", "def", "f"},
        new int[] {0, 0, 0, 0, 5, 5, 6, 6},
        new int[] {1, 1, 7, 1, 7, 6, 7, 7},
        new int[] {1, 0, 0, 0, 1, 0, 1, 0},
        new int[] {1, 1, 3, 1, 2, 1, 1, 1},
        7);
  }

  // This graph can create a disconnected input node that is farther ahead in the output than its
  // subsequent input node.
  // Exceptions: Free too early or dropped tokens.
  public void testShingleWithLargeLeadingGap() throws IOException {
    TokenStream in =
        new CannedTokenStream(
            0, 6, token("abcde", 1, 5, 0, 5), token("ef", 4, 2, 4, 6), token("f", 1, 1, 5, 6));
    TokenStream out = new FlattenGraphFilter(in);
    assertTokenStreamContents(
        out,
        new String[] {"abcde", "f", "ef"},
        new int[] {0, 5, 5},
        new int[] {5, 6, 6},
        new int[] {1, 1, 0},
        new int[] {1, 1, 1},
        6);
  }

  /**
   * build CharsRef containing 2-4 tokens
   *
   * @param tokens vocabulary of tokens
   * @param charsRefBuilder CharsRefBuilder
   * @param random Random for selecting tokens
   * @return Charsref containing 2-4 tokens.
   */
  private CharsRef buildMultiTokenCharsRef(
      String[] tokens, CharsRefBuilder charsRefBuilder, Random random) {
    int srcLen = random.nextInt(2) + 2;
    String[] srcTokens = new String[srcLen];
    for (int pos = 0; pos < srcLen; pos++) {
      srcTokens[pos] = tokens[random().nextInt(tokens.length)];
    }
    SynonymMap.Builder.join(srcTokens, charsRefBuilder);
    return charsRefBuilder.toCharsRef();
  }

  // Create a random graph then delete some edges to see if we can trip up FlattenGraphFilter
  public void testRandomGraphs() throws Exception {
    String[] baseTokens = new String[] {"t1", "t2", "t3", "t4"};
    String[] synTokens = new String[] {"s1", "s2", "s3", "s4"};

    SynonymMap.Builder mapBuilder = new SynonymMap.Builder();
    CharsRefBuilder charRefBuilder = new CharsRefBuilder();
    Random random = random();

    // between 10 and 20 synonym entries
    int synCount = random.nextInt(10) + 10;
    for (int i = 0; i < synCount; i++) {
      int type = random.nextInt(4);
      CharsRef src;
      CharsRef dest;
      switch (type) {
        case 0:
          // 1:1
          src = charRefBuilder.append(baseTokens[random.nextInt(baseTokens.length)]).toCharsRef();
          charRefBuilder.clear();
          dest = charRefBuilder.append(synTokens[random.nextInt(synTokens.length)]).toCharsRef();
          charRefBuilder.clear();
          break;
        case 1:
          // many:1
          src = buildMultiTokenCharsRef(baseTokens, charRefBuilder, random);
          charRefBuilder.clear();
          dest = charRefBuilder.append(synTokens[random.nextInt(synTokens.length)]).toCharsRef();
          charRefBuilder.clear();
          break;
        case 2:
          // 1:many
          src = charRefBuilder.append(baseTokens[random.nextInt(baseTokens.length)]).toCharsRef();
          charRefBuilder.clear();
          dest = buildMultiTokenCharsRef(synTokens, charRefBuilder, random);
          charRefBuilder.clear();
          break;
        default:
          // many:many
          src = buildMultiTokenCharsRef(baseTokens, charRefBuilder, random);
          charRefBuilder.clear();
          dest = buildMultiTokenCharsRef(synTokens, charRefBuilder, random);
          charRefBuilder.clear();
      }
      mapBuilder.add(src, dest, true);
    }

    SynonymMap synMap = mapBuilder.build();

    int stopWordCount = random.nextInt(4) + 1;
    CharArraySet stopWords = new CharArraySet(stopWordCount, true);
    while (stopWords.size() < stopWordCount) {
      int index = random.nextInt(baseTokens.length + synTokens.length);
      String[] tokenArray = baseTokens;
      if (index >= baseTokens.length) {
        index -= baseTokens.length;
        tokenArray = synTokens;
      }
      stopWords.add(tokenArray[index]);
    }

    Analyzer withFlattenGraph =
        new Analyzer() {
          @Override
          protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer in = new WhitespaceTokenizer();
            TokenStream result = new SynonymGraphFilter(in, synMap, true);
            result = new StopFilter(result, stopWords);
            result = new FlattenGraphFilter(result);
            return new TokenStreamComponents(in, result);
          }
        };

    int tokenCount = random.nextInt(20) + 20;
    List<String> stringTokens = new ArrayList<>();
    while (stringTokens.size() < tokenCount) {
      stringTokens.add(baseTokens[random.nextInt(baseTokens.length)]);
    }

    String text = String.join(" ", stringTokens);
    // FlattenGraphFilter can create inconsistent offsets.
    // If that is resolved we can check offsets
    // Until then converting to automaton will pull text through and check if we hit asserts.
    // checkAnalysisConsistency(random, withFlattenGraph, false, text);
    TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
    TokenStream flattenedTokenStream = withFlattenGraph.tokenStream("field", text);
    assertFalse(Operations.hasDeadStates(tsta.toAutomaton(flattenedTokenStream)));
    flattenedTokenStream.close();

    /*
     *  CheckGeneralization can get VERY slow as matching holes to tokens or other holes generates a lot of potentially valid paths.
     *  Analyzer withoutFlattenGraph =
     *      new Analyzer() {
     *        @Override
     *        protected TokenStreamComponents createComponents(String fieldName) {
     *          Tokenizer in = new WhitespaceTokenizer();
     *          TokenStream result = new SynonymGraphFilter(in, synMap, true);
     *          result = new StopFilter(result, stopWords);
     *          return new TokenStreamComponents(in, result);
     *        }
     *      };
     *  checkGeneralization(
     *      withFlattenGraph.tokenStream("field", text),
     *      withoutFlattenGraph.tokenStream("field", text));
     *
     */
  }

  /*
   * Make some strings, make an automaton that accepts those strings, convert that automaton into a TokenStream,
   * flatten it, back to an automaton, and see if the original strings are still accepted.
   */
  public void testPathsNotLost() throws IOException {
    int wordCount = random().nextInt(5) + 5;
    List<BytesRef> acceptStrings = new LinkedList<>();
    for (int i = 0; i < wordCount; i++) {
      int wordLen = random().nextInt(5) + 5;
      BytesRef ref = new BytesRef(wordLen);
      ref.length = wordLen;
      ref.offset = 0;
      for (int j = 0; j < wordLen; j++) {
        ref.bytes[j] = (byte) (random().nextInt(5) + 65);
      }
      acceptStrings.add(ref);
    }
    acceptStrings.sort(Comparator.naturalOrder());

    acceptStrings = acceptStrings.stream().limit(wordCount).toList();
    Automaton nonFlattenedAutomaton = Automata.makeStringUnion(acceptStrings);

    TokenStream ts = AutomatonToTokenStream.toTokenStream(nonFlattenedAutomaton);
    TokenStream flattenedTokenStream = new FlattenGraphFilter(ts);
    TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();
    Automaton flattenedAutomaton = tsta.toAutomaton(flattenedTokenStream);

    // TokenStreamToAutomaton adds position increment transitions into the automaton.
    List<BytesRef> acceptStringsWithPosSep = createAcceptStringsWithPosSep(acceptStrings);

    for (BytesRef acceptString : acceptStringsWithPosSep) {
      assertTrue(
          "string not accepted " + acceptString.utf8ToString(),
          recursivelyValidate(acceptString, 0, 0, flattenedAutomaton));
    }
  }

  /**
   * adds POS_SEP bytes between bytes to match TokenStreamToAutomaton format.
   *
   * @param acceptStrings Byte refs of accepted strings. Each byte is a transition
   * @return List of ByteRefs where each byte is separated by a POS_SEP byte.
   */
  private List<BytesRef> createAcceptStringsWithPosSep(List<BytesRef> acceptStrings) {
    List<BytesRef> acceptStringsWithPosSep = new ArrayList<>();
    for (BytesRef acceptString : acceptStrings) {
      BytesRef withPosSep = new BytesRef(acceptString.length * 2 - 1);
      withPosSep.length = acceptString.length * 2 - 1;
      withPosSep.offset = 0;
      for (int i = 0; i < acceptString.length; i++) {
        withPosSep.bytes[i * 2] = acceptString.bytes[i];
        if (i * 2 + 1 < withPosSep.length) {
          withPosSep.bytes[i * 2 + 1] = TokenStreamToAutomaton.POS_SEP;
        }
      }
      acceptStringsWithPosSep.add(withPosSep);
    }
    return acceptStringsWithPosSep;
  }

  /**
   * Checks if acceptString is accepted by the automaton. Automaton may be an NFA.
   *
   * @param acceptString String to test
   * @param acceptStringIndex current index into acceptString, initial value should be 0
   * @param state state to transition from. initial value should be 0
   * @param automaton Automaton to test
   * @return true if acceptString is accepted by the automaton. otherwise false.
   */
  public boolean recursivelyValidate(
      BytesRef acceptString, int acceptStringIndex, int state, Automaton automaton) {
    if (acceptStringIndex == acceptString.length) {
      return automaton.isAccept(state);
    }

    Transition transition = new Transition();
    automaton.initTransition(state, transition);
    int numTransitions = automaton.getNumTransitions(state);
    boolean accept = false;
    // Automaton can be NFA, so we need to check all matching transitions
    for (int i = 0; i < numTransitions; i++) {
      automaton.getTransition(state, i, transition);
      if (transition.min <= acceptString.bytes[acceptStringIndex]
          && transition.max >= acceptString.bytes[acceptStringIndex]) {
        accept =
            recursivelyValidate(acceptString, acceptStringIndex + 1, transition.dest, automaton);
      }
      if (accept == true) {
        break;
      }
    }
    return accept;
  }

  /*
   * This method checks if strings that lead to the accept state of the not flattened TokenStream
   * also lead to the accept state in the flattened TokenStream. This gets complicated when you
   * factor in holes. The FlattenGraphFilter will remove alternate paths that are made entirely of
   * holes. An alternate path of Holes is indistinguishable from a path that just has long
   * lengths(ex: testStrangelyNumberedNodes). Also alternate paths that end in multiple holes could
   * be interpreted as sequential holes after the branching has converged during flattening. This
   * leads to a lot of weird logic about navigating around holes that may compromise the accuracy of
   * this test.
   *
   * @param flattened flattened TokenStream
   * @param notFlattened not flattened TokenStream
   * @throws IOException on error creating Automata
   */
  /* private void checkGeneralization(TokenStream flattened, TokenStream notFlattened)
      throws IOException {
    TokenStreamToAutomaton tsta = new TokenStreamToAutomaton();

    List<LinkedList<Integer>> acceptStrings = getAcceptStrings(tsta.toAutomaton(notFlattened));
    checkAcceptStrings(acceptStrings, tsta.toAutomaton(flattened));
    flattened.close();
    notFlattened.close();
  }*/

  /*
   * gets up to 10000 strings that lead to accept state in the given automaton.
   *
   * @param automaton automaton
   * @return list of accept sequences
   */
  /* private List<LinkedList<Integer>> getAcceptStrings(Automaton automaton) {
    List<LinkedList<Integer>> acceptedSequences = new LinkedList<>();
    LinkedList<Integer> prefix = new LinkedList<>();
    // state 0 is always the start node
    // Particularly branching automatons can create lots of possible acceptable strings. limit to
    // the first 10K
    buildAcceptStringRecursive(automaton, 0, prefix, acceptedSequences, 10000);
    return acceptedSequences;
  }*/

  /*
   * @param automaton automaton to generate strings from
   * @param state state to start at
   * @param prefix string prefix
   * @param acceptedSequences List of strings build so far.
   * @param limit maximum number of acceptedSequences.
   */
  /*private void buildAcceptStringRecursive(
      Automaton automaton,
      int state,
      LinkedList<Integer> prefix,
      List<LinkedList<Integer>> acceptedSequences,
      int limit) {
    if (acceptedSequences.size() == limit) {
      return;
    }
    if (automaton.isAccept(state)) {
      acceptedSequences.add(new LinkedList<>(prefix));
      return;
    }
    int numTransitions = automaton.getNumTransitions(state);
    Transition transition = new Transition();
    for (int i = 0; i < numTransitions; i++) {
      automaton.getTransition(state, i, transition);
      // min and max are the same transitions made from TokenStreamToAutomaton
      prefix.addLast(transition.min);
      buildAcceptStringRecursive(automaton, transition.dest, prefix, acceptedSequences, limit);
      prefix.removeLast();
    }
  }

  private void checkAcceptStrings(List<LinkedList<Integer>> acceptSequence, Automaton automaton) {
    for (LinkedList<Integer> acceptString : acceptSequence) {
      assertTrue(
          "String did not lead to accept state " + acceptString,
          recursivelyValidateWithHoles(acceptString, 0, automaton));
    }
  }

  private boolean recursivelyValidateWithHoles(
      LinkedList<Integer> acceptSequence, int state, Automaton automaton) {
    if (acceptSequence.isEmpty()) {
      return automaton.isAccept(state);
    }

    Integer curr = acceptSequence.pop();
    int numTransitions = automaton.getNumTransitions(state);
    Transition transition = new Transition();

    boolean accept = false;
    // Automaton can be NFA, so we need to check all matching transitions
    for (int i = 0; i < numTransitions; i++) {
      automaton.getTransition(state, i, transition);
      if (transition.min <= curr && transition.max >= curr) {
        accept = recursivelyValidateWithHoles(acceptSequence, transition.dest, automaton);
        // Factoring in flattened graphs the space covered by a hole may be bigger in the flattened
        // graph.
        // Try consuming more steps with holes.
        if (accept == false
            && transition.min == TokenStreamToAutomaton.HOLE
            && transition.max == TokenStreamToAutomaton.HOLE) {
          acceptSequence.push(TokenStreamToAutomaton.HOLE);
          acceptSequence.push(TokenStreamToAutomaton.POS_SEP);
          accept = recursivelyValidateWithHoles(acceptSequence, transition.dest, automaton);
          acceptSequence.pop();
          acceptSequence.pop();
        }
      } else if (transition.min == TokenStreamToAutomaton.HOLE
          && transition.max == TokenStreamToAutomaton.HOLE
          && automaton.getNumTransitions(transition.dest) > 0) {
        //consume multiple holes in the automaton
        // clear POS_INC
        automaton.getTransition(transition.dest, 0, transition);
        acceptSequence.push(curr);
        accept = recursivelyValidateWithHoles(acceptSequence, transition.dest, automaton);
        acceptSequence.pop();
      } else if(curr == TokenStreamToAutomaton.HOLE) {
        //consume non-holes in the automaton with holes
        while (transition.min != TokenStreamToAutomaton.POS_SEP
                && automaton.getNumTransitions(transition.dest) > 0) {
          automaton.getTransition(transition.dest, 0, transition);
        }
        acceptSequence.push(curr);
        accept = recursivelyValidateWithHoles(acceptSequence, transition.dest, automaton);
        acceptSequence.pop();
      }
      if (accept) {
        break;
      }
    }
    // Flatten graph filter will remove side paths that are only Holes. Gaps may also change size as
    // graph is flattened.
    // Traverse over them if curr is a hole to make sure the gap is kept
    if (accept == false && curr == TokenStreamToAutomaton.HOLE && acceptSequence.size() > 0) {
      // get rid of the separator
      acceptSequence.pop();

      for (int i = 0; i < numTransitions; i++) {
        automaton.getTransition(state, i, transition);
        //advance to the next POS_SEP in automaton
        while (transition.min != TokenStreamToAutomaton.POS_SEP
            && automaton.getNumTransitions(transition.dest) > 0) {
          automaton.getTransition(transition.dest, 0, transition);
        }
        accept = recursivelyValidateWithHoles(acceptSequence, transition.dest, automaton);
        if (accept) {
          break;
        }
      }

      // might be multiple holes squashed under a one step path. Try burning remaining holes
      if (accept == false) {
        accept = recursivelyValidateWithHoles(acceptSequence, state, automaton);
      }

      acceptSequence.push(TokenStreamToAutomaton.POS_SEP);
    }
    acceptSequence.push(curr);
    return accept;
  } */

  // NOTE: TestSynonymGraphFilter's testRandomSyns also tests FlattenGraphFilter
}
